clear all
global system "linux"

if "${system}" == "linux" {
	global code "/"
	global s "/"
}

run "${code}${s}_set-path.do"
cd "${code}/tba_elig"

***** Clean data

use "optimal_blue_locks.dta" if year(lockdate)<=2013, replace


gen year = year(lockdate)

keep if loanterm == 136 // 30 year
keep if amortizationtype == 133 // fixed
keep if purpose == 106 // purchase loans
keep if occupancy == 2 // primary residence
keep if denied == 0
keep if numofunits == 123 // 1 unit
keep if businesschanneltype == "Retail"
keep if propertytype == 115 // single family
keep if ltv <= 80

drop if inlist(loantype, 129, 130, 361) // drop FHA, VA, USDA

gen gse_id = loantype == 127

foreach x in ARM FHA VA GNMA USDA {
	drop if regexm(embs_productset_name, "`x' ") 
}


// merge with CLL
rename fips fipscode 
merge m:1 fipscode using "cll_arra.dta", keep(master matched)
keep if _merge==3
drop _merge
keep if cll > 467000 // keep only high-cost area
drop if inlist(state_id, "PR", "AK", "HI", "VI")

su cll
drop if loanamount > `r(max)'

// reg- plots

gen HiBal = loanamount >  417000
gen PaboveC = (purchaseprice - 417000*1.25)/ 1000
gen HiP = PaboveC > 0
gen point = 100 - price

gen PaboveC1 = PaboveC
gen PaboveC2 = PaboveC * PaboveC 
gen PaboveC3= PaboveC2* PaboveC
 
gen month = mofd(lockdate)


gen lockperiod30 = lockperiod <= 30
gen lockperiod60 = lockperiod > 30 & lockperiod <= 60


gen bin = ceil(PaboveC/2.5)*2.5
label var bin "Appraisal Value - 1.25 CLL ($1000)"

// LLPA bins
qui do "${code}/tba_elig/_LLPAbins.do" fico ltv
egen LLPAbin = group(credit_score_bin ltv_bin)

gen slope = PaboveC * HiP
keep if abs(PaboveC) <= 150


// winsorize
qui su point, d
replace point = . if inrange(point, `r(p1)', `r(p99)') == 0

qui su rate, d
replace rate= . if inrange(rate, `r(p1)', `r(p99)') == 0


// FEs
gen lender=customerhierarchy_index
keep HiBal point rate HiP slope PaboveC* lockperiod* fico zip lockdate LLPA lender gse_id year bin loanamount purchaseprice

save "${hpc}/ob-cll-cleaned.dta", replace

